library(haven)
library(dplyr)
library(scales)

setwd() 
# Set your working directory, the directory should include raw data files
# The raw survey data used in this study were obtained from the Korean Social Science Data Center (KSDC). Due to data use restrictions, we cannot redistribute the dataset directly. However, authorized users may access the original data through the following link (institutional login required): https://www.ksdc.re.kr/

#####################################
##### Make April 2020 dataframe #####
#####################################
apr2020 <- read_sav("dfile_2710.sav") 
apr2020 <- apr2020 %>% filter(!Q15_9 == 96) %>% filter(!Q15_9 == 98) %>% filter(!Q15_10 == 96) %>% filter(!Q15_10 == 98)
apr2020$Q17_1[is.na(apr2020$Q17_1)] <- 99
apr2020$Q17_2[is.na(apr2020$Q17_2)] <- 99

# Make new variable: IDEO 
apr2020 <- apr2020 %>%
  mutate(IDEO = ifelse(Q31_1 %in% c(0,1,2,3,4), "Lib", 
                       ifelse(Q31_1 %in% c(6,7,8,9,10), "Con", "Ind")))
apr2020$IDEO <- factor(apr2020$IDEO, levels = c("Lib", "Con", "Ind"))

# Make new variable: IDEO2, for robustness check
apr2020 <- apr2020 %>%
  mutate(IDEO2 = ifelse(Q31_1 %in% c(0,1,2,3), "Lib", 
                        ifelse(Q31_1 %in% c(7,8,9,10), "Con", "Ind")))
apr2020$IDEO2 <- factor(apr2020$IDEO2, levels = c("Lib", "Con", "Ind"))

# Make new variable: PID
apr2020$PID <- ifelse(apr2020$Q17_1 == 10, "Noparty",
                  ifelse(apr2020$Q17_1 == 2 | apr2020$Q17_2 == 2, "MTD",
                         ifelse(apr2020$Q17_1 == 1 | apr2020$Q17_2 == 1, "DEM", "Other")))

# Make new variable: PIDlong
apr2020$PIDlong <- ifelse(apr2020$Q17_1 == 10, "Noapr2020ty",
                      ifelse(apr2020$Q17_1 == 2 | apr2020$Q17_2 == 2, "MTD",
                             ifelse(apr2020$Q17_1 == 1 | apr2020$Q17_2 == 1, "DEM",
                                    ifelse(apr2020$Q17_1 == 4 | apr2020$Q17_2 == 4, "JUS",
                                           ifelse(apr2020$Q17_1 == 5 | apr2020$Q17_2 == 5, "ACS", "Other")))))

# Make new variable: SORTING & SORTINGlong
apr2020 <- apr2020 %>% mutate(SORTING = ifelse(IDEO == "Lib" & PID == "DEM", 1, 
                                       ifelse(IDEO == "Con" & PID == "MTD", 1, 0))) %>% 
  mutate(SORTING2 = ifelse(IDEO2 == "Lib" & PID == "DEM", 1, 
                                        ifelse(IDEO2 == "Con" & PID == "MTD", 1, 0))) %>%
  mutate(SORTINGlong = ifelse((IDEO == "Lib" & PIDlong %in% c("DEM", "JUS")) |
                                (IDEO == "Con" & PIDlong %in% c("MTD", "ACS")),
                              1, 0))
# Make new variable: AGE
apr2020$AGE <- 2020 - apr2020$AQ4_1 

# Make new variable: GENDER
apr2020 <- apr2020 %>% mutate(GENDER = ifelse(AQ3 == 1, 0, 1))
apr2020$GENDER <- as.factor(apr2020$GENDER)

# Make new variable: EDUC
apr2020 <- apr2020 %>%
  mutate(SQ6_numeric = as.numeric(SQ6), 
         SQ6_recoded = recode(SQ6_numeric,
                              "96" = 1,  # Less than pre-school → 1
                              "1" = 2,   # Pre-school → 2
                              "2" = 3,   # Elementary  → 3
                              "3" = 4,   # Middle → 4
                              "4" = 5,   # Highschool → 5
                              "5" = 6,   # Highschool/Irregular → 6
                              "6" = 7,   # College → 7
                              "7" = 8,   # BA → 8
                              "8" = 9,   # Master → 9
                              "9" = 10)) # Phd → 10
apr2020$EDUC <- apr2020$SQ6_recoded
apr2020$EDUC <- as.factor(apr2020$SQ6)

# Make new variable: INCOME
apr2020$INCOME <- as.factor(apr2020$SQ9)

# Make new variable: AP, affective polarization
apr2020$AP <- abs(apr2020$Q15_9 - apr2020$Q15_10)

# Make new variable: DEMODICT
apr2020 <- apr2020 %>% mutate(DEMODICT = ifelse(apr2020$Q28 == 1, 0, 1))

# Make new variable: GTD
apr2020 <- apr2020 %>% mutate(GTD = ifelse(apr2020$Q25_7 %in% c(1,2), 1, 0))

# Make new variable: outcome variable, Y
apr2020$Y  <- (apr2020$DEMODICT + apr2020$GTD)/2

# Make new variable: outcome variable, raw Y
apr2020$Y10 <- apr2020$Q28 + apr2020$Q25_7
apr2020$Y10 <- rescale(apr2020$Y10, to = c(0, 1))

# Save it
write_sav(apr2020, "apr2020_df.sav")

#####################################
##### Make April 2022 dataframe #####
#####################################
apr2022 <- read_sav("dfile_2825.sav") 

apr2022$q35 <- ifelse(apr2022$q35 == 1, 2, 1) # reverse code
apr2022$q36 <- ifelse(apr2022$q36 == 1, 2, 1) # reverse code

# Make new variable: IDEO 
apr2022 <- apr2022 %>%
  mutate(IDEO = ifelse(q153 %in% c(0,1,2,3,4), "Lib", 
                       ifelse(q153 %in% c(6,7,8,9,10), "Con", "Ind"))) %>%
  mutate(IDEO2 = ifelse(q153 %in% c(0,1,2,3), "Lib", 
                        ifelse(q153 %in% c(7,8,9,10), "Con", "Ind")))

# Make new variable: PID
apr2022$PID <- ifelse(apr2022$q35 == 1 & apr2022$q37 == 1, "DEM", 
                   ifelse(apr2022$q35 == 1 & apr2022$q37 == 2, "PPP", 
                          ifelse(apr2022$q35 == 2 & apr2022$q36 == 2, "NoParty", "Others")))

# Make new variable: SORTING
apr2022 <- apr2022 %>% mutate(SORTING = ifelse(IDEO == "Lib" & PID == "DEM" | IDEO == "Con" & PID == "PPP", 1, 0))

# Make new variable: SORTING2
apr2022 <- apr2022 %>% mutate(SORTING2 = ifelse(IDEO2 == "Lib" & PID == "DEM", 1, 
                                          ifelse(IDEO2 == "Con" & PID == "PPP", 1, 0)))
# Make new variable: AGE
apr2022$AGE <- apr2022$q2_1

# Make new variable: GENDER
apr2022 <- apr2022 %>% mutate(GENDER = ifelse(q1 == 1, 0, 1))
apr2022$GENDER <- as.factor(apr2022$GENDER)

# Make new variable: EDUC
apr2022$EDUC <- as.factor(apr2022$q187)

# Make new variable: INCOME
apr2022$INCOME <- as.factor(apr2022$q192)

# Make new variable: DEMODICT
apr2022 <- apr2022 %>% mutate(DEMODICT = ifelse(q141 == 1, 0, 1))

# Make new variable: GTD
apr2022$GTD <- ifelse(apr2022$q85 %in% c(1,2), 0, 1)

# Make new variable: DICT
apr2022$DICT <- ifelse(apr2022$q87 %in% c(1,2), 0, 1)

# Make new variable: Y
apr2022$Y <- (apr2022$DEMODICT + apr2022$GTD + apr2022$DICT)/3

# Make new variable: Y10
apr2022$Y10 <- apr2022$q141 + apr2022$q85 + apr2022$q87
apr2022$Y10 <- rescale(apr2022$Y10, to = c(0, 1))

# Make new variable: Affective polarization
apr2022$AP <- abs(apr2022$q95-apr2022$q96)

# Save it
write_sav(apr2022, "apr2022_df.sav")

#####################################
##### Make June 2022 dataframe ######
#####################################
jun2022 <- read_sav("dfile_2829.sav") 

jun2022 <- jun2022 %>% filter(!q36 == 98) %>% filter(!q37 == 98) %>% filter(!q95 == 98) # removed from dataset

# Make new variable: IDEO 
jun2022$IDEO <- ifelse(jun2022$q95 %in% c(0,1,2,3,4), "Lib", 
                 ifelse(jun2022$q95 %in% c(6,7,8,9,10), "Con", "Ind"))

# Make new variable: IDEO2
jun2022$IDEO2 <- ifelse(jun2022$q95 %in% c(0,1,2,3), "Lib", 
                  ifelse(jun2022$q95 %in% c(7,8,9,10), "Con", "Ind"))

# Make new variable: PID (NOTE!!)
jun2022$q50[is.na(jun2022$q50)] <- 99
jun2022$PID <- ifelse(jun2022$q48 == 1 & jun2022$q49 == 1, "DEM", 
                ifelse(jun2022$q48 == 2 & jun2022$q50 == 1, "DEM", 
                       ifelse(jun2022$q48 == 1 & jun2022$q49 == 2, "PPP", 
                              ifelse(jun2022$q48 ==2 & jun2022$q50 == 2, "PPP", 
                                     ifelse(jun2022$q48 == 2 & jun2022$q50 == 99, "NoParty", "Others")))))

# Make new variable: PID2
jun2022$q49[is.na(jun2022$q49)] <- 99
jun2022$PID2 <- ifelse(jun2022$q49 == 1, "DEM", ifelse(jun2022$q49 == 2, "PPP", ifelse(jun2022$q49 == 3, "JUS", "Others")))

# Make new variable: SORTING
jun2022$SORTING <- ifelse(jun2022$IDEO == "Lib" & jun2022$PID == "DEM", 1, 
                    ifelse(jun2022$IDEO == "Con" & jun2022$PID == "PPP", 1, 0))

# Make new variable: SORTING2
jun2022$SORTING2 <- ifelse(jun2022$IDEO2 == "Lib" & jun2022$PID == "DEM", 1, 
                     ifelse(jun2022$IDEO2 == "Con" & jun2022$PID == "PPP", 1, 0))

# Make new variable: AGE
jun2022$AGE <- jun2022$q2_1

# Make new variable: GENDER
jun2022$GENDER <- ifelse(jun2022$q1 == 1, 0, 1)
jun2022$GENDER <- as.factor(jun2022$GENDER)

# Make new variable: EDUC
jun2022$EDUC <- as.factor(jun2022$q230)

# Make new variable: INCOME
jun2022$INCOME <- as.factor(jun2022$q235)

# Make new variable: AP
jun2022$AP <- abs(jun2022$q36-jun2022$q37)

# Make new variable: DEMODICT
jun2022$DEMODICT <- ifelse(jun2022$q90 == 1, 0, 1)
table(jun2022$DEMODICT)

# Make new variable:get things done
jun2022$GTD <- ifelse(jun2022$q98 %in% c(1,2), 0, 1)
table(jun2022$GTD)

# Make new variable: Dictatorship 
jun2022$DICT <- ifelse(jun2022$q102 %in% c(1,2), 0, 1)
table(jun2022$DICT)

# Make new variable: Y
jun2022$Y <- (jun2022$DEMODICT + jun2022$DICT + jun2022$GTD) / 3

# Make new variable: Y10
jun2022$Y10 <- jun2022$q90 + jun2022$q98 + jun2022$q102
jun2022$Y10 <- rescale(jun2022$Y10, to = c(0, 1))

# Save it
write_sav(jun2022, "jun2022_df.sav")

#####################################
##### Make April 2024 dataframe #####
#####################################

apr2024 <- read_sav("dfile_3024.sav")

# Make new variable: IDEO
apr2024 <- apr2024 %>%
  mutate(IDEO = ifelse(q125 %in% c(0, 1, 2, 3, 4), "Lib", 
                       ifelse(q125 %in% c(6, 7, 8, 9, 10), "Con", "Ind")))

# Make new variable: IDEO2
apr2024 <- apr2024 %>%
  mutate(IDEO2 = ifelse(q125 %in% c(0, 1, 2, 3), "Lib", 
                        ifelse(q125 %in% c(7, 8, 9, 10), "Con", "Ind")))

# Make new variable: PID
apr2024$PID <- ifelse(apr2024$q59 == 1 & apr2024$q57 == 1 | apr2024$q58 == 1 & apr2024$q57 == 2, "DEM", 
                ifelse(apr2024$q59 == 2 & apr2024$q57 == 1 | apr2024$q58 == 2 & apr2024$q57 == 2, "PPP", 
                       ifelse(apr2024$q57 == 2 & apr2024$q58 == 10, "NoParty", "Other")))

# Make new variable: PID2
apr2024$PID2 <- ifelse(apr2024$q59 == 1 & apr2024$q57 == 1, "DEM", 
                 ifelse(apr2024$q59 == 2 & apr2024$q57 == 1, "PPP", 
                        ifelse(apr2024$q57 == 2 & apr2024$q58 == 10, "NoParty", "Other")))

# Make new variable: SORTING
apr2024$SORTING <- ifelse(apr2024$IDEO == "Lib" & apr2024$PID == "DEM", 1, 
                    ifelse(apr2024$IDEO == "Con" & apr2024$PID == "PPP", 1, 0))

# Make new variable: SORTING2
apr2024$SORTING2 <- ifelse(apr2024$IDEO2 == "Lib" & apr2024$PID == "DEM", 1, 
                     ifelse(apr2024$IDEO2 == "Con" & apr2024$PID == "PPP", 1, 0))

# Make new variable: AGE
apr2024$AGE <- apr2024$q4_1

# Make new variable: GENDER
apr2024$GENDER <- ifelse(apr2024$q3 == 1, 0, 1)
apr2024$GENDER <- as.factor(apr2024$GENDER)

# Make new variable: EDUC
apr2024$EDUC <- as.factor(apr2024$q179)

# Make new variable: INCOME
apr2024$INCOME <- as.factor(apr2024$q182)

# Make new variable: DEMODICT, GTD, DICT
apr2024$DEMODICT <- ifelse(apr2024$q95 == 1, 0, 1)

apr2024$GTD <- ifelse(apr2024$q165 %in% c(1,2), 0, 1)

apr2024$GTDnew <- ifelse(apr2024$q165 %in% c(1, 2), 0,
                   ifelse(apr2024$q165 %in% c(4, 5), 1, NA))

apr2024$DICT <- ifelse(apr2024$q167 %in% c(1,2), 0, 1)
table(apr2024$DICT)
apr2024$DICTnew <- ifelse(apr2024$q167 %in% c(1, 2), 0,
                    ifelse(apr2024$q167 %in% c(4, 5), 1, NA))

# Make new variable: Y
apr2024$Y <- (apr2024$GTD + apr2024$DICT + apr2024$DEMODICT) / 3
hist(apr2024$Y)

# Make new variable: Ynew
apr2024$Ynew <- (apr2024$GTDnew + apr2024$DICTnew + apr2024$DEMODICT) / 3
hist(apr2024$Ynew)

# Make new variable: Y10
apr2024$Y10 <- apr2024$q95 + apr2024$q165 + apr2024$q167
apr2024$Y10 <- rescale(apr2024$Y10, to = c(0, 1))

# Make new variable: Affective Polarization
apr2024$AP <- abs((apr2024$q46 + apr2024$q49 + apr2024$q52)/3 - (apr2024$q45 + apr2024$q47 + apr2024$q53)/3)

# Make new variable: Affective Polarization2 --> report this as the main
apr2024 <- apr2024 %>%
  filter(q52 != 96, q52 != 98, q53 != 96, q53 != 98) %>%
  mutate(AP2 = abs(q52 - q53))

# Save it
write_sav(apr2024, "apr2024_df.sav")

